In [1]:
# Instalar las versiones específicas de dash y werkzeug
# !pip install dash==2.14.2 werkzeug==2.2.3
In [2]:
# Importar librerías necesarias
import pandas as pd
import plotly.express as px
from pathlib import Path
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import numpy as np
# Ignorar advertencias de FutureWarning
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
# Cargar el conjunto de datos
df = pd.read_csv("../data/USAHousingDataset.csv")
df.head(2)
Out[2]:
| date | price | bedrooms | bathrooms | sqft_living | sqft_lot | floors | waterfront | view | condition | sqft_above | sqft_basement | yr_built | yr_renovated | street | city | statezip | country | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2014-05-09 00:00:00 | 376000.0 | 3.0 | 2.00 | 1340 | 1384 | 3.0 | 0 | 0 | 3 | 1340 | 0 | 2008 | 0 | 9245-9249 Fremont Ave N | Seattle | WA 98103 | USA |
| 1 | 2014-05-09 00:00:00 | 800000.0 | 4.0 | 3.25 | 3540 | 159430 | 2.0 | 0 | 0 | 3 | 3540 | 0 | 2007 | 0 | 33001 NE 24th St | Carnation | WA 98014 | USA |
Prueba primera imagen¶
In [3]:
# Set default renderer for Plotly
# Esto permite que las gráficas se muestren correctamente en Jupyter Notebooks
import plotly.io as pio
pio.renderers.default = "notebook"
In [4]:
# Ajustar tipos de datos y limpiar datos
df.columns = df.columns.str.strip()
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['sqft_living'] = pd.to_numeric(df['sqft_living'], errors='coerce')
df['city'] = df['city'].fillna("Unknown")
In [5]:
# Scatter interactivo
fig = px.scatter(
df,
x="sqft_living",
y="price",
color="city",
title="Precio Vs. Pies Cuadrados por Ciudad",
labels={"sqft_living": "Área (pies cuadrados - sqft)", "price": "Precio"},
template="plotly_white"
)
In [6]:
# Menú dropdown para filtrar por ciudad dentro del gráfico
buttons = [
dict(label="Todas",
method="update",
args=[{"visible": [True]*len(df.city.unique())}])
]
In [7]:
# Agregar botones para cada ciudad
# Prueba con una gráfica de dispersión
for i, city in enumerate(df.city.unique()):
vis = [c == city for c in df.city.unique()]
buttons.append(
dict(label=city,
method="update",
args=[{"visible": vis}])
)
fig.update_layout(
updatemenus=[dict(active=0, buttons=buttons, x=1.1, y=1.1)]
)
fig.show()
Armando imágenes para el tablero¶
In [8]:
# Preparar datos adicionales
# Agregar columnas necesarias para análisis
df['city'] = df['city'].fillna("Unknown")
df['price_per_sqft'] = df['price'] / df['sqft_living'].replace({0: np.nan})
# Lista de ciudades
cities = sorted(df['city'].dropna().unique().astype(str))
all_label = "Todas"
options = [all_label] + cities
# Armando elementos para el tablero
# Mediana global del precio de la vivienda
median_global_full = float(df['price'].median())
median_global_full
Out[8]:
460000.0
In [9]:
# Array para almacenar los datos por ciudad
city_payload = {}
# Calculo del top 10 en una tabla
res_global = df.groupby("city").agg(n=("price","count"), med=("price","median")).sort_values("n", ascending=False).head(10).reset_index()
table_header = list(res_global.columns)
table_cells = [res_global[c].tolist() for c in res_global.columns]
# Recopilar datos por ciudad
for city in options:
if city == all_label:
d = df.copy()
else:
d = df[df['city'] == city]
# Dispersión (scatter) de precio y area
scatter_x = d['sqft_living'].fillna('').astype(object).tolist()
scatter_y = d['price'].fillna('').astype(object).tolist()
custom = np.stack([d['city'].astype(str).tolist(), d.get('bedrooms', pd.Series([""]*len(d))).astype(str).tolist()], axis=1).tolist() if len(d) else [[],[]]
# Histograma de precios
hist_x = d['price'].fillna('').astype(object).tolist()
# Serie de tiempo (ts) de mediana mensual
if 'date' in d.columns and d['date'].notna().any():
d_dates = d.dropna(subset=['date','price']).copy()
d_dates['date'] = pd.to_datetime(d_dates['date'])
ts = d_dates.set_index('date').resample('ME')['price'].median().reset_index()
ts_x = ts['date'].tolist()
ts_y = ts['price'].tolist()
else: # Si no hay datos de fecha
ts_x, ts_y = [], []
# Box plot de precio por número de habitaciones
box_x = d['bedrooms'].astype(str).fillna('').tolist() if 'bedrooms' in d.columns else []
box_y = d['price'].fillna('').astype(object).tolist()
# Almacenar los datos en el diccionario por ciudad
city_payload[city] = {
"scatter_x": scatter_x,
"scatter_y": scatter_y,
"customdata": custom,
"hist_x": hist_x,
"ts_x": ts_x,
"ts_y": ts_y,
"box_x": box_x,
"box_y": box_y,
# if you want the table per city uncomment the following:
# "table_cells": [ ... ]
}
display (res_global)
| city | n | med | |
|---|---|---|---|
| 0 | Seattle | 1415 | 488000.0 |
| 1 | Renton | 261 | 345000.0 |
| 2 | Bellevue | 260 | 727016.0 |
| 3 | Redmond | 209 | 640000.0 |
| 4 | Kent | 167 | 283200.0 |
| 5 | Kirkland | 166 | 523500.0 |
| 6 | Issaquah | 162 | 561000.0 |
| 7 | Auburn | 162 | 268500.0 |
| 8 | Sammamish | 158 | 662500.0 |
| 9 | Federal Way | 131 | 263000.0 |
In [10]:
# Crear la figura con subplots - Tablero
fig = make_subplots(
rows=3, cols=2,
specs=[
[{"type": "xy"}, {"type": "xy"}],
[{"type": "xy"}, {"type": "xy"}],
[{"type": "table"}, {"type": "xy"}]
],
subplot_titles=(
"Precio Mediano de Vivienda en USA",
"Precio Vs. Area",
"Distribución de Precios",
"Precio por Tiempo",
"Top Ciudades con precios más altos",
"Precio mediano Vs. Año de construcción"
),
vertical_spacing=0.10,
horizontal_spacing=0.10
)
In [11]:
# KPI valor de venta mediana en USA
annotation_kpi = dict(
text=f"Precio mediano — General\n${median_global_full:,.0f}",
x=0.06, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
showarrow=False, font=dict(size=20, color="#0b3b59"), align="left"
)
fig.update_layout(annotations=[annotation_kpi])
fig.show()
In [12]:
# Gráfico de dispersión (scatter) inicial
init = city_payload[all_label]
scatter = go.Scattergl(
x=init["scatter_x"], y=init["scatter_y"], mode='markers',
marker=dict(size=6, opacity=0.7),
customdata=init["customdata"],
hovertemplate="City: %{customdata[0]}<br>Beds: %{customdata[1]}<br>Precio: %{y:$,.0f}<extra></extra>"
)
fig.add_trace(scatter, row=1, col=2)
fig.update_xaxes(title_text="Área (pies cuadrados)", row=1, col=2)
fig.update_yaxes(title_text="Precio (USD)", row=1, col=2)
fig.show()
In [13]:
# Histograma
hist = go.Histogram(x=init["hist_x"], nbinsx=35, opacity=0.75)
fig.add_trace(hist, row=2, col=1)
fig.update_xaxes(title_text="Precio (USD)", row=2, col=1)
fig.show()
In [14]:
# Serie de tiempo
ts_line = go.Scatter(x=init["ts_x"], y=init["ts_y"], mode='lines+markers')
fig.add_trace(ts_line, row=2, col=2)
fig.update_xaxes(title_text="Fecha de venta", row=2, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=2, col=2)
fig.show()
In [15]:
# Tabla de top 10 ciudades con precios más altos
table = go.Table(
header=dict(
values=["Ciudad", "Cantidad de propiedades", "Precio mediano (USD)"],
fill_color="lightgray",
font=dict(size=13, color="#0b3b59", family="Arial Black"),
align="center"
),
cells=dict(
values=table_cells,
format=[None, ",", "$,.0f"],
align="center",
font=dict(size=12)
)
)
fig.add_trace(table, row=3, col=1)
fig.show()
In [16]:
# Tabla de top 10 ciudades con precios más altos
# Generador de tendencia para precio mediano vs año de construcción
df_valid = df[df['yr_built'] > 0].copy()
trend = df_valid.groupby("yr_built")["price"].median().reset_index()
# Gráfico de línea para precio mediano vs año de construcción
age_line = go.Scatter(
x=trend["yr_built"],
y=trend["price"],
mode="lines+markers",
marker=dict(size=6),
line=dict(width=2, color="#1f77b4"),
hovertemplate="Año: %{x}<br>Precio mediano: $%{y:,.0f}<extra></extra>"
)
fig.add_trace(age_line, row=3, col=2)
fig.update_xaxes(title_text="Año de construcción", row=3, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=3, col=2)
fig.show()
Ajustando tablero final¶
In [17]:
# Librerías necesarias
import pandas as pd
import numpy as np
from pathlib import Path
from plotly.subplots import make_subplots
import plotly.graph_objects as go
# Preparar datos adicionales
# Agregar columnas necesarias para análisis
df['city'] = df['city'].fillna("Unknown")
df['price_per_sqft'] = df['price'] / df['sqft_living'].replace({0: np.nan})
# Lista de ciudades
cities = sorted(df['city'].dropna().unique().astype(str))
all_label = "Todas"
options = [all_label] + cities
# Armando elementos para el tablero
# Mediana global del precio de la vivienda
median_global_full = float(df['price'].median())
# Array para almacenar los datos por ciudad
city_payload = {}
# Calculo del top 10 en una tabla
res_global = df.groupby("city").agg(n=("price","count"), med=("price","median")).sort_values("n", ascending=False).head(10).reset_index()
table_header = list(res_global.columns)
table_cells = [res_global[c].tolist() for c in res_global.columns]
# Recopilar datos por ciudad
for city in options:
if city == all_label:
d = df.copy()
else:
d = df[df['city'] == city]
# Dispersión (scatter) de precio y area
scatter_x = d['sqft_living'].fillna('').astype(object).tolist()
scatter_y = d['price'].fillna('').astype(object).tolist()
custom = np.stack([d['city'].astype(str).tolist(), d.get('bedrooms', pd.Series([""]*len(d))).astype(str).tolist()], axis=1).tolist() if len(d) else [[],[]]
# Histograma de precios
hist_x = d['price'].fillna('').astype(object).tolist()
# Serie de tiempo de mediana mensual
if 'date' in d.columns and d['date'].notna().any():
d_dates = d.dropna(subset=['date','price']).copy()
d_dates['date'] = pd.to_datetime(d_dates['date'])
ts = d_dates.set_index('date').resample('ME')['price'].median().reset_index()
ts_x = ts['date'].tolist()
ts_y = ts['price'].tolist()
else:
ts_x, ts_y = [], []
# Tendencia de precio mediano vs año de construcción
d_year = d[(d['yr_built'].notna()) & (d['yr_built'] > 0) & (d['price'].notna())].copy()
if len(d_year):
trend_year = d_year.groupby("yr_built")["price"].median().reset_index().sort_values("yr_built")
yr_x = trend_year['yr_built'].tolist()
yr_y = trend_year['price'].tolist()
else:
yr_x, yr_y = [], []
# Almacenar los datos en el diccionario por ciudad
city_payload[city] = {
"scatter_x": scatter_x,
"scatter_y": scatter_y,
"customdata": custom,
"hist_x": hist_x,
"ts_x": ts_x,
"ts_y": ts_y,
"box_x": box_x,
"box_y": box_y,
"yr_x": yr_x,
"yr_y": yr_y
}
# Crear la figura con subplots - Tablero
fig = make_subplots(
rows=3, cols=2,
specs=[
[{"type": "xy"}, {"type": "xy"}],
[{"type": "xy"}, {"type": "xy"}],
[{"type": "table"}, {"type": "xy"}]
],
subplot_titles=(
"Precio Mediano de Vivienda en USA",
"Precio Vs. Area",
"Distribución de Precios",
"Precio por Tiempo",
"Top Ciudades con precios más altos",
"Precio mediano Vs. Año de construcción"
),
vertical_spacing=0.10,
horizontal_spacing=0.10
)
# KPI valor de venta mediana en USA
annotation_kpi = dict(
text=f"<b>Precio mediano — General</b><br><span style='font-size:26px;'>${median_global_full:,.0f}</span>",
x=0.03, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
showarrow=False, font=dict(size=20, color="#0b3b59"), align="left"
)
fig.update_layout(annotations=[annotation_kpi])
# Gráfico de dispersión (scatter) inicial
init = city_payload[all_label]
scatter = go.Scattergl(
x=init["scatter_x"], y=init["scatter_y"], mode='markers',
marker=dict(size=6, opacity=0.7),
customdata=init["customdata"],
hovertemplate="City: %{customdata[0]}<br>Beds: %{customdata[1]}<br>Precio: %{y:$,.0f}<extra></extra>"
)
fig.add_trace(scatter, row=1, col=2)
fig.update_xaxes(title_text="Área (pies cuadrados)", row=1, col=2)
fig.update_yaxes(title_text="Precio (USD)", row=1, col=2)
# Histograma
hist = go.Histogram(x=init["hist_x"], nbinsx=35, opacity=0.75)
fig.add_trace(hist, row=2, col=1)
fig.update_xaxes(title_text="Precio (USD)", row=2, col=1)
fig.update_yaxes(title_text="Cantidad de propiedades", row=2, col=1)
# Serie de tiempo
ts_line = go.Scatter(x=init["ts_x"], y=init["ts_y"], mode='lines+markers')
fig.add_trace(ts_line, row=2, col=2)
fig.update_xaxes(title_text="Fecha de venta", row=2, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=2, col=2)
# Tabla de top 10 ciudades con precios más altos
table = go.Table(
header=dict(
values=["Ciudad", "Cantidad de propiedades", "Precio mediano (USD)"],
fill_color="lightgray",
font=dict(size=13, color="#0b3b59", family="Arial Black"),
align="center"
),
cells=dict(
values=table_cells,
format=[None, ",", "$,.0f"],
align="center",
font=dict(size=12)
)
)
fig.add_trace(table, row=3, col=1)
# Tendencia año construcción
age_line = go.Scatter(
x=init["yr_x"],
y=init["yr_y"],
mode="lines+markers",
marker=dict(size=6),
line=dict(width=2, color="#1f77b4"),
hovertemplate="Año: %{x}<br>Precio mediano: $%{y:,.0f}<extra></extra>"
)
fig.add_trace(age_line, row=3, col=2)
fig.update_xaxes(title_text="Año de construcción", row=3, col=2)
fig.update_yaxes(title_text="Precio mediano (USD)", row=3, col=2)
# Construir botones para actualizar los datos por ciudad
buttons = []
total_traces = len(fig.data)
def build_update_args_for_city(payload):
x_list = [None] * total_traces
y_list = [None] * total_traces
custom_list = [None] * total_traces
cells_list = [None] * total_traces
# scatter
x_list[0] = payload["scatter_x"]
y_list[0] = payload["scatter_y"]
custom_list[0] = payload["customdata"]
# hist
x_list[1] = payload["hist_x"]
# time series
x_list[2] = payload["ts_x"]
y_list[2] = payload["ts_y"]
# table
cells_list[3] = table_cells
# trend yr
x_list[4] = payload["yr_x"]
y_list[4] = payload["yr_y"]
return {"x": x_list, "y": y_list, "customdata": custom_list, "cells.values": cells_list}
# Crear botones
for city in options:
payload = city_payload[city]
args0 = build_update_args_for_city(payload)
if city == all_label:
ann_text = f"<b>Precio mediano — General</b><br><span style='font-size:26px;'>${median_global_full:,.0f}</span>"
else:
med = float(df[df['city'] == city]['price'].median())
ann_text = f"<b>Precio mediano — {city}</b><br><span style='font-size:26px;'>${med:,.0f}</span>"
args1 = {
"annotations": [dict(
text=ann_text,
x=0.03, y=0.96, xref='paper', yref='paper', xanchor='left', yanchor='top',
showarrow=False, font=dict(size=20, color="#0b3b59")
)]
}
buttons.append(dict(label=city, method="update", args=[args0, args1]))
# Actualizar el diseño de la figura con el filtro creado
fig.update_layout(
updatemenus=[dict(
active=0, buttons=buttons,
x=0.98, y=1.06,
xanchor="right", yanchor="top",
showactive=True
)],
annotations=[
# KPI (arriba-izq)
dict(
text=f"<b>Precio mediano — General</b><br><span style='font-size:26px; color:#0b3b59;'>${median_global_full:,.0f}</span>",
x=0.03, y=0.96,
xref="paper", yref="paper",
xanchor="left", yanchor="top",
showarrow=False,
font=dict(size=20, color="#0b3b59"),
align="left"
),
# Label del filtro (sobre el dropdown)
dict(
text="<b>Filtro por ciudad:</b>",
x=0.83, y=1.055,
xref="paper", yref="paper",
xanchor="right", yanchor="top",
showarrow=False,
font=dict(size=14, color="#0b3b59"),
align="right"
)
],
autosize=False, width=1200, height=920,
title=dict(
text="<b><span style='color:#0b3b59;'>🇺🇸 DASHBOARD – USA HOUSING 🇺🇸</span></b>",
x=0.5, xanchor="center", font=dict(size=24)
),
showlegend=False,
template="plotly_white",
margin=dict(t=120, l=60, r=120, b=40)
)
# Mostrar tablero
fig.show()
Incluyendo ingreso de variables para predecir¶
In [18]:
from IPython.display import display, HTML
import json
# Convertir la figura Plotly a HTML
html_plot = fig.to_html(full_html=False, include_plotlyjs='cdn')
# Definir campos de entrada
numeric_fields = [
"bedrooms","bathrooms","sqft_living","sqft_lot",
"floors","waterfront","view","condition","sqft_above",
"sqft_basement","yr_built","yr_renovated"
]
string_fields = ["street","statezip"]
# Generar inputs HTML
inputs_html = ""
for f in numeric_fields:
inputs_html += (
f"<div class='field-row'>"
f"<label for='{f}' class='lbl'>{f}</label>"
f"<input type='number' id='{f}' class='inp num' />"
f"</div>"
)
for f in string_fields:
inputs_html += (
f"<div class='field-row'>"
f"<label for='{f}' class='lbl'>{f}</label>"
f"<input type='text' id='{f}' class='inp txt' />"
f"</div>"
)
# Generar JS para capturar inputs
js_lines = []
for f in numeric_fields:
js_lines.append(f"payload['{f}'] = document.getElementById('{f}').value ? Number(document.getElementById('{f}').value) : null;")
for f in string_fields:
js_lines.append(f"payload['{f}'] = document.getElementById('{f}').value || '';")
js_payload = "\n ".join(js_lines)
median_mock = float(df['price'].median())
# Construir el HTML completo
# HTML final con estilos para parecerse al dashboard original
html = f"""
<style>
/* Layout */
.dashboard-wrap {{ display:flex; gap:24px; font-family: 'Trebuchet MS', Arial, sans-serif; align-items:flex-start; }}
.left-panel {{ flex: 3; min-width: 820px; }}
.right-panel {{
flex: 1;
max-width: 360px;
background: #fafafa;
border-left: 8px solid #222;
padding:18px;
border-radius:6px;
height:884px;
overflow:auto;
box-shadow: 0 2px 6px rgba(0,0,0,0.03);
}}
/* Header / KPI area (aplica dentro del plotly) */
.header-title {{ text-align:center; font-weight:700; color:#0b3b59; font-size:24px; margin-bottom:6px; }}
/* Form styling */
.field-row {{ margin-bottom:8px; display:flex; flex-direction:column; }}
.lbl {{ font-size:12px; color:#7d868f; margin-bottom:4px; text-transform:lowercase; }}
.inp {{
padding:2px 4px;
height: 24px;
border:1px solid #cfcfcf;
border-radius:4px;
outline:none;
font-size:13px;
box-sizing:border-box;
}}
.inp:focus {{ border-color:#7fbf7f; box-shadow: 0 0 0 3px rgba(127,191,127,0.08); }}
/* Numeric narrower inputs */
.inp.num {{ width:100%; }}
/* Button */
.btn-predict {{
background:#2ca02c; color:white; border:none; padding:10px 14px; width:100%; border-radius:6px;
font-weight:600; font-size:14px; cursor:pointer; margin-top:10px;
}}
.btn-predict:hover {{ opacity:0.95; }}
/* Resultado box */
.result-box {{
margin-top:12px;
padding:6px 10px;
background:white;
border:1px solid #dcdcdc;
border-radius:6px;
font-size:14px;
color:#0b3b59;
height:auto;
min-height: auto;
line-height: 1.2;
}}
/* Sección labels */
.side-title {{ font-weight:700; color:#0b3b59; margin-bottom:6px; font-size:16px; }}
.side-sub {{ color:#7d868f; font-size:12px; margin-bottom:10px; }}
/* Ajustes responsive */
@media (max-width: 1200px) {{
.left-panel {{ min-width: 680px; }}
.right-panel {{ max-width:320px; }}
}}
</style>
<div class="dashboard-wrap">
<div class="left-panel">
<!-- aquí se inserta la visualización de plotly -->
{html_plot}
</div>
<div class="right-panel">
<div class="side-title">Predicción</div>
<div class="side-sub">Ingresa los valores y genera la predicción</div>
<!-- inputs -->
<div id="inputs-area">
{inputs_html}
</div>
<button class="btn-predict" onclick="generarPrediccion()">Generar predicción</button>
<hr style="margin:12px 0;">
<div style="font-size:13px; color:#7d868f; margin-bottom:6px;">Resultado</div>
<div id="resultado" class="result-box"><i>Aún no se ha generado predicción.</i></div>
</div>
</div>
<script>
function generarPrediccion() {{
let payload = {{}};
{js_payload}
// Guardar globalmente para inspeccionar con window.payload desde la consola
window.payload = payload;
// Validaciones básicas: ejemplo, convertir NaN a null y chequeo simple
for (let k in payload) {{
if (typeof payload[k] === 'number' && isNaN(payload[k])) payload[k] = null;
}}
// Mock de predicción precio
let pred = {median_mock};
// Mostrar resultado con formato similar a dashboard
document.getElementById('resultado').innerHTML = "<div style='font-weight:700;color:#0b3b59;font-size:16px;'>$" + pred.toLocaleString()
+ "</div><div style='color:#6b6f75;font-size:12px;margin-top:6px;'></div>";
console.log("Payload (listo para backend):", payload);
}}
</script>
"""
display(HTML(html))
Predicción
Ingresa los valores y genera la predicción
Resultado
Aún no se ha generado predicción.
In [ ]: